data & libraries
load(here::here("tmpdata/dfinfo.rda"))
load(here::here("tmpdata/sel1.data.l.anno.rda"))
df.all = do.call(rbind,
lapply(1:length(sel1.data.l.anno), function(i) {
df = sel1.data.l.anno[[i]]
df$rf = names(sel1.data.l.anno)[[i]]
df2 = df %>%
mutate(is.pair = ifelse(is.na(name), 0, 1)) %>%
mutate(Z = (score_gplmDCA - mean(score_gplmDCA)) /sd(score_gplmDCA))
return(df2)
}))
funcs
library(RColorBrewer)
myColors <- brewer.pal(4,"Set1")
names(myColors) <- c("TP","TN","FP","FN")
col_scale <- scale_colour_manual(name = "type",values = myColors)
plot_tpfpfn = function(df) {
L = max(df$V2) / 2
len = max(df$V2)
tbl.anno =
df %>%
mutate(is.pair = ifelse(is.na(name), 0, 1)) %>%
# mutate(is.pred = ifelse(rank(-score_gplmDCA) < L, 1, 0)) %>%
mutate(Z=(score_gplmDCA-mean(score_gplmDCA))/sd(score_gplmDCA),
is.pred = Z > 3) %>%
mutate(
type = ifelse(is.pred > 0 & is.pair > 0, "TP", NA),
type = ifelse(is.pred > 0 & is.pair == 0, "FP", type),
type = ifelse(is.pred == 0 & is.pair > 0, "FN", type),
type = ifelse(is.pred == 0 & is.pair == 0, "TN", type)
)
gg = tbl.anno %>%
filter(is.pred > 0 | is.wc > 0) %>%
ggplot() +
geom_point(aes(x = V2, y = V1, color = type)) +
coord_fixed()+
xlim(1,len)+
# ylim(1,len)+
col_scale+scale_y_reverse(limits = c(len,1))
gg
}
exp
df = sel1.data.l.anno$RF00032
plot_tpfpfn(df)

# df.all.anno =
df.all.summ1 =
df.all %>%
group_by(rf) %>%
mutate(p_wc = sum(is.wc),
p_nwc = sum(is.nwc),
p_pair = sum(is.pair)) %>%
filter(Z > 3) %>%
summarise(sen_wc = sum(is.wc)/max(p_wc),
sen_nwc = sum(is.nwc)/max(p_nwc),
sen_pair = sum(is.pair)/max(p_pair)) %>%
left_join(dfinfo) %>%
arrange(meff)
## Joining, by = "rf"
# # df.all.summ1.sum_again =
# df.all.summ1 %>%
# summarise(n_gt80 = sum(f_pair > 0.5))
table(df.all.summ1$sen_wc > 0.75)
##
## FALSE TRUE
## 20 26
df.all %>%
ggplot(aes(x=is.wc>0,y=score_gplmDCA))+geom_boxplot()
contact maps
ggs = lapply(1:length(sel1.data.l.anno), function(i){
df = sel1.data.l.anno[[i]]
gg =plot_tpfpfn(df)
rf = names(sel1.data.l.anno)[[i]]
meff = dfinfo$meff[which(dfinfo$rf == rf)]
L = dfinfo$L[which(dfinfo$rf == rf)]
gg = gg+ ggtitle(paste0(rf, " meff: ", meff, " NT: ", L))
return(gg)
})
# gridExtra::marrangeGrob(grobs = ggs, nrow=2,ncol=2)
plot_tpr = function(pred, label){
df = data.frame(pred=pred, label = label) %>%
arrange(desc(pred)) %>%
mutate(rank_it= rank(-pred),
TP =cumsum(label)/ rank_it)
return(df)
}
tmpdf = sel1.data.l.anno$RF01734
df = plot_tpr(tmpdf$score_gplmDCA,tmpdf$is.wc)
plot_tpfpfn(tmpdf)

testit2 = function(preds, label, L){
P=sum(label)
rslt = numeric(ncol(preds))
for (i in 1:ncol(preds)){
tmpdf = data.frame(s=preds[,i], l =label)
rslt[i] = sum(tmpdf %>% arrange(desc(s)) %>% slice(1:round(L))%>% pull(l))/P
}
names(rslt) = names(preds)
return(rslt)
}
plot_contacts = function(df){
L = max(df$V2) /2
len = max(df$V2)
tbl.anno =
df %>%
mutate(is.contact_8 = ifelse(abs(dist) < 12, 1,0)) %>%
mutate(is.pair = ifelse(is.na(name), 0, 1)) %>%
mutate(is.pred = ifelse(rank(-score_gplmDCA) < L, 1, 0)) %>%
mutate(
type = ifelse(is.pred > 0 & is.contact_8 > 0, "TP", NA),
type = ifelse(is.pred > 0 & is.contact_8 == 0, "FP", type),
type = ifelse(is.pred == 0 & is.contact_8 > 0, "FN", type),
type = ifelse(is.pred == 0 & is.contact_8 == 0, "TN", type)
)
gg = tbl.anno %>%
filter(is.contact_8>0) %>%
ggplot() +
geom_point(aes(x = V1, y = V2),
alpha = 0.5,
color = "grey",
size = 2) +
geom_point(aes(x = V2, y = V1),
alpha = 0.5,
color = "grey",
size = 2)+
geom_point(data= tbl.anno %>% filter(is.pred>0),
aes(x=V2,y=V1,color=type))+
coord_fixed()+scale_y_reverse()+col_scale
gg
}
df = sel1.data.l.anno$RF02012
plot_contacts(df)

plot_tpfpfn(df)

ggs2 = lapply(1:length(sel1.data.l.anno), function(i){
df = sel1.data.l.anno[[i]]
gg =plot_contacts(df)
rf = names(sel1.data.l.anno)[[i]]
meff = dfinfo$meff[which(dfinfo$rf == rf)]
L = dfinfo$L[which(dfinfo$rf == rf)]
gg = gg+ ggtitle(paste0(rf, " meff: ", meff, " NT: ", L))
return(gg)
})
# gridExtra::marrangeGrob(grobs = ggs, nrow=2,ncol=2)
ggall = c(ggs,ggs2)
for (i in 1:46){
grid.arrange(ggs[[i]],ggs2[[i]],ncol = 2)
}













































